1 package org.apache.lucene.util.automaton;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 import java.util.Arrays;
24 import java.util.BitSet;
25 import java.util.Collection;
26 import java.util.Collections;
27 import java.util.HashSet;
28 import java.util.Set;
29
30 import org.apache.lucene.util.Accountable;
31 import org.apache.lucene.util.ArrayUtil;
32 import org.apache.lucene.util.InPlaceMergeSorter;
33 import org.apache.lucene.util.RamUsageEstimator;
34 import org.apache.lucene.util.Sorter;
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57 public class Automaton implements Accountable {
58
59
60
61
62 private int nextState;
63
64
65
66
67 private int nextTransition;
68
69
70
71
72 private int curState = -1;
73
74
75
76
77
78 private int[] states;
79
80 private final BitSet isAccept;
81
82
83 private int[] transitions;
84
85
86 private boolean deterministic = true;
87
88
89 public Automaton() {
90 this(2, 2);
91 }
92
93
94
95
96
97
98
99
100
101
102 public Automaton(int numStates, int numTransitions) {
103 states = new int[numStates * 2];
104 isAccept = new BitSet(numStates);
105 transitions = new int[numTransitions * 3];
106 }
107
108
109 public int createState() {
110 growStates();
111 int state = nextState/2;
112 states[nextState] = -1;
113 nextState += 2;
114 return state;
115 }
116
117
118 public void setAccept(int state, boolean accept) {
119 if (state >= getNumStates()) {
120 throw new IllegalArgumentException("state=" + state + " is out of bounds (numStates=" + getNumStates() + ")");
121 }
122 if (accept) {
123 isAccept.set(state);
124 } else {
125 isAccept.clear(state);
126 }
127 }
128
129
130
131 public Transition[][] getSortedTransitions() {
132 int numStates = getNumStates();
133 Transition[][] transitions = new Transition[numStates][];
134 for(int s=0;s<numStates;s++) {
135 int numTransitions = getNumTransitions(s);
136 transitions[s] = new Transition[numTransitions];
137 for(int t=0;t<numTransitions;t++) {
138 Transition transition = new Transition();
139 getTransition(s, t, transition);
140 transitions[s][t] = transition;
141 }
142 }
143
144 return transitions;
145 }
146
147
148 BitSet getAcceptStates() {
149 return isAccept;
150 }
151
152
153 public boolean isAccept(int state) {
154 return isAccept.get(state);
155 }
156
157
158 public void addTransition(int source, int dest, int label) {
159 addTransition(source, dest, label, label);
160 }
161
162
163 public void addTransition(int source, int dest, int min, int max) {
164 assert nextTransition%3 == 0;
165
166 if (source >= nextState/2) {
167 throw new IllegalArgumentException("source=" + source + " is out of bounds (maxState is " + (nextState/2-1) + ")");
168 }
169 if (dest >= nextState/2) {
170 throw new IllegalArgumentException("dest=" + dest + " is out of bounds (max state is " + (nextState/2-1) + ")");
171 }
172
173 growTransitions();
174 if (curState != source) {
175 if (curState != -1) {
176 finishCurrentState();
177 }
178
179
180 curState = source;
181 if (states[2*curState] != -1) {
182 throw new IllegalStateException("from state (" + source + ") already had transitions added");
183 }
184 assert states[2*curState+1] == 0;
185 states[2*curState] = nextTransition;
186 }
187
188 transitions[nextTransition++] = dest;
189 transitions[nextTransition++] = min;
190 transitions[nextTransition++] = max;
191
192
193 states[2*curState+1]++;
194 }
195
196
197
198
199 public void addEpsilon(int source, int dest) {
200 Transition t = new Transition();
201 int count = initTransition(dest, t);
202 for(int i=0;i<count;i++) {
203 getNextTransition(t);
204 addTransition(source, t.dest, t.min, t.max);
205 }
206 if (isAccept(dest)) {
207 setAccept(source, true);
208 }
209 }
210
211
212
213 public void copy(Automaton other) {
214
215
216 int stateOffset = getNumStates();
217 states = ArrayUtil.grow(states, nextState + other.nextState);
218 System.arraycopy(other.states, 0, states, nextState, other.nextState);
219 for(int i=0;i<other.nextState;i += 2) {
220 if (states[nextState+i] != -1) {
221 states[nextState+i] += nextTransition;
222 }
223 }
224 nextState += other.nextState;
225 int otherNumStates = other.getNumStates();
226 BitSet otherAcceptStates = other.getAcceptStates();
227 int state = 0;
228 while (state < otherNumStates && (state = otherAcceptStates.nextSetBit(state)) != -1) {
229 setAccept(stateOffset + state, true);
230 state++;
231 }
232
233
234 transitions = ArrayUtil.grow(transitions, nextTransition + other.nextTransition);
235 System.arraycopy(other.transitions, 0, transitions, nextTransition, other.nextTransition);
236 for(int i=0;i<other.nextTransition;i += 3) {
237 transitions[nextTransition+i] += stateOffset;
238 }
239 nextTransition += other.nextTransition;
240
241 if (other.deterministic == false) {
242 deterministic = false;
243 }
244 }
245
246
247 private void finishCurrentState() {
248 int numTransitions = states[2*curState+1];
249 assert numTransitions > 0;
250
251 int offset = states[2*curState];
252 int start = offset/3;
253 destMinMaxSorter.sort(start, start+numTransitions);
254
255
256 int upto = 0;
257 int min = -1;
258 int max = -1;
259 int dest = -1;
260
261 for(int i=0;i<numTransitions;i++) {
262 int tDest = transitions[offset+3*i];
263 int tMin = transitions[offset+3*i+1];
264 int tMax = transitions[offset+3*i+2];
265
266 if (dest == tDest) {
267 if (tMin <= max+1) {
268 if (tMax > max) {
269 max = tMax;
270 }
271 } else {
272 if (dest != -1) {
273 transitions[offset+3*upto] = dest;
274 transitions[offset+3*upto+1] = min;
275 transitions[offset+3*upto+2] = max;
276 upto++;
277 }
278 min = tMin;
279 max = tMax;
280 }
281 } else {
282 if (dest != -1) {
283 transitions[offset+3*upto] = dest;
284 transitions[offset+3*upto+1] = min;
285 transitions[offset+3*upto+2] = max;
286 upto++;
287 }
288 dest = tDest;
289 min = tMin;
290 max = tMax;
291 }
292 }
293
294 if (dest != -1) {
295
296 transitions[offset+3*upto] = dest;
297 transitions[offset+3*upto+1] = min;
298 transitions[offset+3*upto+2] = max;
299 upto++;
300 }
301
302 nextTransition -= (numTransitions-upto)*3;
303 states[2*curState+1] = upto;
304
305
306 minMaxDestSorter.sort(start, start+upto);
307
308 if (deterministic && upto > 1) {
309 int lastMax = transitions[offset+2];
310 for(int i=1;i<upto;i++) {
311 min = transitions[offset + 3*i + 1];
312 if (min <= lastMax) {
313 deterministic = false;
314 break;
315 }
316 lastMax = transitions[offset + 3*i + 2];
317 }
318 }
319 }
320
321
322
323 public boolean isDeterministic() {
324 return deterministic;
325 }
326
327
328
329
330
331 public void finishState() {
332 if (curState != -1) {
333 finishCurrentState();
334 curState = -1;
335 }
336 }
337
338
339
340
341 public int getNumStates() {
342 return nextState/2;
343 }
344
345
346 public int getNumTransitions() {
347 return nextTransition / 3;
348 }
349
350
351 public int getNumTransitions(int state) {
352 assert state >= 0;
353 int count = states[2*state+1];
354 if (count == -1) {
355 return 0;
356 } else {
357 return count;
358 }
359 }
360
361 private void growStates() {
362 if (nextState+2 >= states.length) {
363 states = ArrayUtil.grow(states, nextState+2);
364 }
365 }
366
367 private void growTransitions() {
368 if (nextTransition+3 >= transitions.length) {
369 transitions = ArrayUtil.grow(transitions, nextTransition+3);
370 }
371 }
372
373
374 private final Sorter destMinMaxSorter = new InPlaceMergeSorter() {
375
376 private void swapOne(int i, int j) {
377 int x = transitions[i];
378 transitions[i] = transitions[j];
379 transitions[j] = x;
380 }
381
382 @Override
383 protected void swap(int i, int j) {
384 int iStart = 3*i;
385 int jStart = 3*j;
386 swapOne(iStart, jStart);
387 swapOne(iStart+1, jStart+1);
388 swapOne(iStart+2, jStart+2);
389 };
390
391 @Override
392 protected int compare(int i, int j) {
393 int iStart = 3*i;
394 int jStart = 3*j;
395
396
397 int iDest = transitions[iStart];
398 int jDest = transitions[jStart];
399 if (iDest < jDest) {
400 return -1;
401 } else if (iDest > jDest) {
402 return 1;
403 }
404
405
406 int iMin = transitions[iStart+1];
407 int jMin = transitions[jStart+1];
408 if (iMin < jMin) {
409 return -1;
410 } else if (iMin > jMin) {
411 return 1;
412 }
413
414
415 int iMax = transitions[iStart+2];
416 int jMax = transitions[jStart+2];
417 if (iMax < jMax) {
418 return -1;
419 } else if (iMax > jMax) {
420 return 1;
421 }
422
423 return 0;
424 }
425 };
426
427
428 private final Sorter minMaxDestSorter = new InPlaceMergeSorter() {
429
430 private void swapOne(int i, int j) {
431 int x = transitions[i];
432 transitions[i] = transitions[j];
433 transitions[j] = x;
434 }
435
436 @Override
437 protected void swap(int i, int j) {
438 int iStart = 3*i;
439 int jStart = 3*j;
440 swapOne(iStart, jStart);
441 swapOne(iStart+1, jStart+1);
442 swapOne(iStart+2, jStart+2);
443 };
444
445 @Override
446 protected int compare(int i, int j) {
447 int iStart = 3*i;
448 int jStart = 3*j;
449
450
451 int iMin = transitions[iStart+1];
452 int jMin = transitions[jStart+1];
453 if (iMin < jMin) {
454 return -1;
455 } else if (iMin > jMin) {
456 return 1;
457 }
458
459
460 int iMax = transitions[iStart+2];
461 int jMax = transitions[jStart+2];
462 if (iMax < jMax) {
463 return -1;
464 } else if (iMax > jMax) {
465 return 1;
466 }
467
468
469 int iDest = transitions[iStart];
470 int jDest = transitions[jStart];
471 if (iDest < jDest) {
472 return -1;
473 } else if (iDest > jDest) {
474 return 1;
475 }
476
477 return 0;
478 }
479 };
480
481
482
483
484
485 public int initTransition(int state, Transition t) {
486 assert state < nextState/2: "state=" + state + " nextState=" + nextState;
487 t.source = state;
488 t.transitionUpto = states[2*state];
489 return getNumTransitions(state);
490 }
491
492
493 public void getNextTransition(Transition t) {
494
495 assert (t.transitionUpto+3 - states[2*t.source]) <= 3*states[2*t.source+1];
496
497
498 assert transitionSorted(t);
499
500 t.dest = transitions[t.transitionUpto++];
501 t.min = transitions[t.transitionUpto++];
502 t.max = transitions[t.transitionUpto++];
503 }
504
505 private boolean transitionSorted(Transition t) {
506
507 int upto = t.transitionUpto;
508 if (upto == states[2*t.source]) {
509
510 return true;
511 }
512
513 int nextDest = transitions[upto];
514 int nextMin = transitions[upto+1];
515 int nextMax = transitions[upto+2];
516 if (nextMin > t.min) {
517 return true;
518 } else if (nextMin < t.min) {
519 return false;
520 }
521
522
523 if (nextMax > t.max) {
524 return true;
525 } else if (nextMax < t.max) {
526 return false;
527 }
528
529
530 if (nextDest > t.dest) {
531 return true;
532 } else if (nextDest < t.dest) {
533 return false;
534 }
535
536
537 return false;
538 }
539
540
541
542 public void getTransition(int state, int index, Transition t) {
543 int i = states[2*state] + 3*index;
544 t.source = state;
545 t.dest = transitions[i++];
546 t.min = transitions[i++];
547 t.max = transitions[i++];
548 }
549
550 static void appendCharString(int c, StringBuilder b) {
551 if (c >= 0x21 && c <= 0x7e && c != '\\' && c != '"') b.appendCodePoint(c);
552 else {
553 b.append("\\\\U");
554 String s = Integer.toHexString(c);
555 if (c < 0x10) b.append("0000000").append(s);
556 else if (c < 0x100) b.append("000000").append(s);
557 else if (c < 0x1000) b.append("00000").append(s);
558 else if (c < 0x10000) b.append("0000").append(s);
559 else if (c < 0x100000) b.append("000").append(s);
560 else if (c < 0x1000000) b.append("00").append(s);
561 else if (c < 0x10000000) b.append("0").append(s);
562 else b.append(s);
563 }
564 }
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583 public String toDot() {
584
585
586 StringBuilder b = new StringBuilder();
587 b.append("digraph Automaton {\n");
588 b.append(" rankdir = LR\n");
589 final int numStates = getNumStates();
590 if (numStates > 0) {
591 b.append(" initial [shape=plaintext,label=\"0\"]\n");
592 b.append(" initial -> 0\n");
593 }
594
595 Transition t = new Transition();
596
597 for(int state=0;state<numStates;state++) {
598 b.append(" ");
599 b.append(state);
600 if (isAccept(state)) {
601 b.append(" [shape=doublecircle,label=\"" + state + "\"]\n");
602 } else {
603 b.append(" [shape=circle,label=\"" + state + "\"]\n");
604 }
605 int numTransitions = initTransition(state, t);
606
607 for(int i=0;i<numTransitions;i++) {
608 getNextTransition(t);
609
610 assert t.max >= t.min;
611 b.append(" ");
612 b.append(state);
613 b.append(" -> ");
614 b.append(t.dest);
615 b.append(" [label=\"");
616 appendCharString(t.min, b);
617 if (t.max != t.min) {
618 b.append('-');
619 appendCharString(t.max, b);
620 }
621 b.append("\"]\n");
622
623 }
624 }
625 b.append('}');
626 return b.toString();
627 }
628
629
630
631
632 int[] getStartPoints() {
633 Set<Integer> pointset = new HashSet<>();
634 pointset.add(Character.MIN_CODE_POINT);
635
636 for (int s=0;s<nextState;s+=2) {
637 int trans = states[s];
638 int limit = trans+3*states[s+1];
639
640 while (trans < limit) {
641 int min = transitions[trans+1];
642 int max = transitions[trans+2];
643
644 pointset.add(min);
645 if (max < Character.MAX_CODE_POINT) {
646 pointset.add(max + 1);
647 }
648 trans += 3;
649 }
650 }
651 int[] points = new int[pointset.size()];
652 int n = 0;
653 for (Integer m : pointset) {
654 points[n++] = m;
655 }
656 Arrays.sort(points);
657 return points;
658 }
659
660
661
662
663
664
665
666
667 public int step(int state, int label) {
668 assert state >= 0;
669 assert label >= 0;
670 int trans = states[2*state];
671 int limit = trans + 3*states[2*state+1];
672
673 while (trans < limit) {
674 int dest = transitions[trans];
675 int min = transitions[trans+1];
676 int max = transitions[trans+2];
677 if (min <= label && label <= max) {
678 return dest;
679 }
680 trans += 3;
681 }
682
683 return -1;
684 }
685
686
687
688
689
690
691 public static class Builder {
692 private int nextState = 0;
693 private final BitSet isAccept;
694 private int[] transitions;
695 private int nextTransition = 0;
696
697
698 public Builder() {
699 this(16, 16);
700 }
701
702
703
704
705
706
707
708
709
710
711 public Builder(int numStates, int numTransitions) {
712 isAccept = new BitSet(numStates);
713 transitions = new int[numTransitions * 4];
714 }
715
716
717 public void addTransition(int source, int dest, int label) {
718 addTransition(source, dest, label, label);
719 }
720
721
722 public void addTransition(int source, int dest, int min, int max) {
723 if (transitions.length < nextTransition+4) {
724 transitions = ArrayUtil.grow(transitions, nextTransition+4);
725 }
726 transitions[nextTransition++] = source;
727 transitions[nextTransition++] = dest;
728 transitions[nextTransition++] = min;
729 transitions[nextTransition++] = max;
730 }
731
732
733
734
735 public void addEpsilon(int source, int dest) {
736 for (int upto = 0; upto < nextTransition; upto += 4) {
737 if (transitions[upto] == dest) {
738 addTransition(source, transitions[upto + 1], transitions[upto + 2], transitions[upto + 3]);
739 }
740 }
741 if (isAccept(dest)) {
742 setAccept(source, true);
743 }
744 }
745
746
747
748 private final Sorter sorter = new InPlaceMergeSorter() {
749
750 private void swapOne(int i, int j) {
751 int x = transitions[i];
752 transitions[i] = transitions[j];
753 transitions[j] = x;
754 }
755
756 @Override
757 protected void swap(int i, int j) {
758 int iStart = 4*i;
759 int jStart = 4*j;
760 swapOne(iStart, jStart);
761 swapOne(iStart+1, jStart+1);
762 swapOne(iStart+2, jStart+2);
763 swapOne(iStart+3, jStart+3);
764 };
765
766 @Override
767 protected int compare(int i, int j) {
768 int iStart = 4*i;
769 int jStart = 4*j;
770
771
772 int iSrc = transitions[iStart];
773 int jSrc = transitions[jStart];
774 if (iSrc < jSrc) {
775 return -1;
776 } else if (iSrc > jSrc) {
777 return 1;
778 }
779
780
781 int iMin = transitions[iStart+2];
782 int jMin = transitions[jStart+2];
783 if (iMin < jMin) {
784 return -1;
785 } else if (iMin > jMin) {
786 return 1;
787 }
788
789
790 int iMax = transitions[iStart+3];
791 int jMax = transitions[jStart+3];
792 if (iMax < jMax) {
793 return -1;
794 } else if (iMax > jMax) {
795 return 1;
796 }
797
798
799 int iDest = transitions[iStart+1];
800 int jDest = transitions[jStart+1];
801 if (iDest < jDest) {
802 return -1;
803 } else if (iDest > jDest) {
804 return 1;
805 }
806
807 return 0;
808 }
809 };
810
811
812
813 public Automaton finish() {
814
815 int numStates = nextState;
816 int numTransitions = nextTransition / 4;
817 Automaton a = new Automaton(numStates, numTransitions);
818
819
820 for (int state = 0; state < numStates; state++) {
821 a.createState();
822 a.setAccept(state, isAccept(state));
823 }
824
825
826 sorter.sort(0, numTransitions);
827 for (int upto = 0; upto < nextTransition; upto += 4) {
828 a.addTransition(transitions[upto],
829 transitions[upto+1],
830 transitions[upto+2],
831 transitions[upto+3]);
832 }
833
834 a.finishState();
835
836 return a;
837 }
838
839
840 public int createState() {
841 return nextState++;
842 }
843
844
845 public void setAccept(int state, boolean accept) {
846 if (state >= getNumStates()) {
847 throw new IllegalArgumentException("state=" + state + " is out of bounds (numStates=" + getNumStates() + ")");
848 }
849
850 this.isAccept.set(state, accept);
851 }
852
853
854 public boolean isAccept(int state) {
855 return this.isAccept.get(state);
856 }
857
858
859 public int getNumStates() {
860 return nextState;
861 }
862
863
864 public void copy(Automaton other) {
865 int offset = getNumStates();
866 int otherNumStates = other.getNumStates();
867
868
869 copyStates(other);
870
871
872 Transition t = new Transition();
873 for(int s=0;s<otherNumStates;s++) {
874 int count = other.initTransition(s, t);
875 for(int i=0;i<count;i++) {
876 other.getNextTransition(t);
877 addTransition(offset + s, offset + t.dest, t.min, t.max);
878 }
879 }
880 }
881
882
883 public void copyStates(Automaton other) {
884 int otherNumStates = other.getNumStates();
885 for (int s = 0; s < otherNumStates; s++) {
886 int newState = createState();
887 setAccept(newState, other.isAccept(s));
888 }
889 }
890 }
891
892 @Override
893 public long ramBytesUsed() {
894
895 return RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.sizeOf(states) + RamUsageEstimator.sizeOf(transitions) +
896 RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + (isAccept.size() / 8) + RamUsageEstimator.NUM_BYTES_OBJECT_REF +
897 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF +
898 3 * RamUsageEstimator.NUM_BYTES_INT +
899 RamUsageEstimator.NUM_BYTES_BOOLEAN;
900 }
901
902 @Override
903 public Collection<Accountable> getChildResources() {
904 return Collections.emptyList();
905 }
906 }